#####
# author: Derek Norton
# date: 2024-01-16
# client: Cristalyne Bell, Jon Temte, Maureen Goss, Cecilia He
# purpose: Beta-corona script for sharing
# notes:
# syntax: window
#####

rm(list = ls())

library(dplyr)
library(tidyr)
library(magrittr)
library(ggplot2)
library(lme4)
library(tableone)


fam.set <-
  read.csv('outputs/tables/deident_bcor_family_set_data_for_regression.csv') %>%
  as_tibble
  
ind.set <-
  read.csv('outputs/tables/deident_bcor_individual_set_data_for_SIR_and_regression.csv') %>%
  as_tibble



### functions for diagnostics

pearson_X2 <- function (amod) {
  sum(residuals(amod, type = "pearson") ^ 2)
}
# pearson X^2 statistic; any model

disp_check <- function (amod) {
  pearson_X2(amod) / df.residual(amod)
}
# estimate dispersion parameter; generalized models



### family level set and model

fam.mod <-
  glm(prop_opps_infected ~ memb_per_bdrm + index_sev + index_age, data = fam.set,
    family = binomial, weights = fam_infect_opps)
# no errors or warnings

summary(fam.mod)
# more persons / bedrooms => more spread!

disp_check(fam.mod)
# overdisperse
summary(fam.mod, dispersion = disp_check(fam.mod))
# bedroom items still holds!



### individual level set and model

ind.mod2 <-
  glmer(bcor ~ memb_per_bdrm + index_sev + index_age + age + (1 | recid),
    data = filter(ind.set, !index_case), family = binomial)
# save the index age model; include member age
# no warnings!

summary(ind.mod2)
# subject age sig, with lower age => greater infect likelihood (again, probably
#   due to the student illness conditioning / bias issue)

disp_check(ind.mod2)
# potentially underdisperse...



### univariate items and graphics

fam.set %>%
  ggplot(aes(x = memb_per_bdrm, y = prop_opps_infected)) +
  geom_smooth(method = 'lm') +
  #geom_point(alpha = 0.3) +
  geom_jitter(alpha = 0.3) +
  xlab('Family members per bedroom in household') +
  ylab('Proportion of household members beta-CoV+\n(excluding index cases)')
  # ggtitle('Members-per-bedroom vs Household spread',
  #   subtitle = 'with linear trendline')
ggsave(filename = 'outputs/figures/family bedroom density vs prop of household infected (linear).png',
  width = 6.5, height = 4)
ggsave(filename = 'outputs/figures/Figure 2.tiff', dpi = 1200,
  width = 5.5, height = 3.5)

ind.mod2@frame %>%
  .$bcor %>%
  {binom.test(x = sum(.), n = length(.))}
# SIR and CI



### draft tables

tmp <-
summary(ind.mod2)$coef
tmp %>%
  as_tibble %>%
  mutate(Covar = rownames(tmp)) %>%
  rename(SE = `Std. Error`,
    p_value = `Pr(>|z|)`) %>%
  mutate(OR = exp(Estimate),
    ORlcl = exp(Estimate - 1.96 * SE) %>% round(digits = 2),
    ORucl = exp(Estimate + 1.96 * SE) %>% round(digits = 2),
    OR_CI = paste0(ORlcl, ' \u2014 ', ORucl)) %>%
  select(Covar, Estimate, SE, `z value`, OR, OR_CI, p_value) %>%
  write.csv(file = 'individual model summary.csv')
rm(tmp)
# individual model reg output

tmp <-
  summary(fam.mod, dispersion = disp_check(fam.mod))$coef
tmp %>%
  as_tibble %>%
  mutate(Covar = rownames(tmp)) %>%
  rename(SE = `Std. Error`,
    p_value = `Pr(>|z|)`) %>%
  mutate(OR = exp(Estimate),
    ORlcl = exp(Estimate - 1.96 * SE) %>% round(digits = 2),
    ORucl = exp(Estimate + 1.96 * SE) %>% round(digits = 2),
    OR_CI = paste0(ORlcl, ' \u2014 ', ORucl)) %>%
  select(Covar, Estimate, SE, `z value`, OR, OR_CI, p_value) %>%
  write.csv(file = 'family model summary.csv')
rm(tmp)
# fam model reg output



### time from index to next cases

ind.set %>%
  filter(bcor, !index_case) %>%
  mutate(st_dt = as.Date(st_dt, format = "%m/%d/%Y"),
    first_bcor_dt = as.Date(first_bcor_dt, format = '%m/%d/%Y'),
    ttbcor = as.numeric(st_dt - first_bcor_dt)) %>%
  .$ttbcor %T>%
  {print(summary(.))} %>%
  sd(na.rm = T)




### symptoms exchange for severity in regressions
# fever, cough, rhino, malaise, nascon

s.fam.ls <-
  list(
    fever = glm(formula = prop_opps_infected ~ memb_per_bdrm + index_fever + 
    index_age, family = binomial, data = fam.set, weights = fam_infect_opps),
    
    cough = glm(formula = prop_opps_infected ~ memb_per_bdrm + index_cough + 
    index_age, family = binomial, data = fam.set, weights = fam_infect_opps),
    
    rhino = glm(formula = prop_opps_infected ~ memb_per_bdrm + index_rhino + 
    index_age, family = binomial, data = fam.set, weights = fam_infect_opps),

    malaise = glm(formula = prop_opps_infected ~ memb_per_bdrm + index_malaise + 
    index_age, family = binomial, data = fam.set, weights = fam_infect_opps),

    nascon = glm(formula = prop_opps_infected ~ memb_per_bdrm + index_nascon + 
    index_age, family = binomial, data = fam.set, weights = fam_infect_opps)
  )
# replace severity with individual symptoms present

lapply(s.fam.ls, function(amod) {summary(amod)$coef[3,]})
# fever sig uncorrected
sapply(s.fam.ls, function(amod) {summary(amod)$coef[3,4]}) %>%
  p.adjust(method = 'BH')
# but doesn't survive correction

s.ind.ls <-
  list(
    fever = glmer(bcor ~ memb_per_bdrm + index_fever + index_age + age + (1 | recid),
    data = filter(ind.set, !index_case), family = binomial),
    
    cough = glmer(bcor ~ memb_per_bdrm + index_cough + index_age + age + (1 | recid),
    data = filter(ind.set, !index_case), family = binomial),
    
    rhino = glmer(bcor ~ memb_per_bdrm + index_rhino + index_age + age + (1 | recid),
    data = filter(ind.set, !index_case), family = binomial),

    malaise = glmer(bcor ~ memb_per_bdrm + index_malaise + index_age + age + (1 | recid),
    data = filter(ind.set, !index_case), family = binomial),

    nascon = glmer(bcor ~ memb_per_bdrm + index_nascon + index_age + age + (1 | recid),
    data = filter(ind.set, !index_case), family = binomial)
  )
# replace severity with individual symptoms present

lapply(s.ind.ls, function(amod) {summary(amod)$coef[3,]})
# all non sig


### regression subject demog tables

ind.set %>%
  select(recid, substudy_id, index_case, age, gender, relationship, memb_per_bdrm,
    bcor, onset) %>%
  filter(!is.na(recid), !is.na(substudy_id), !is.na(index_case), !is.na(age),
    !is.na(gender), !is.na(relationship), !is.na(memb_per_bdrm), !is.na(bcor)) %>%
  filter(recid %in% (fam.mod$data %>% na.omit %>% .$recid),
    !(substudy_id %in% c('528-1', '2156-1'))) %>% 
  mutate(onset = ifelse(bcor, onset, as.numeric(NA)),
    child = age < 18) %>%
  mutate(
    Relationship = case_when(
      relationship %in% c('Father', 'Mother') ~ 'Parent',
      relationship %in% c('Grandfather', 'Grandmother') ~ 'Grandparent',
      relationship == 'Student' ~ 'Student',
      relationship == 'Sibling' ~ 'Sibling',
      relationship == 'Other Adult' ~ 'Other Adult')) %>%
  select(-substudy_id, -relationship) %T>%
  {print(range(.$onset, na.rm = T))} %T>%
  {.$recid %>% unique %>% length %>% print} %T>%
  {print(summary(.))} %>%
  CreateTableOne(data = .) %>%
  {write.csv(print(.), file = 'outputs/tables/individuals model demogs.csv')}
# individ demogs

fam.mod$data %>%
  na.omit %>%
  select(fam_infect_opps, prop_opps_infected, memb_per_bdrm, contains('index_')) %>%
  mutate(index_sev = factor(index_sev)) %>%
  CreateTableOne(data = .) %>%
  {write.csv(print(.), file = 'outputs/tables/family model demogs.csv')}
# family demogs



